# Differential expression on Kallisto data
# Preliminary samples - 2016 dataset
# Packages and dependence
packageCheckClassic <- function(x){
#
for( i in x ){
if( ! require( i , character.only = TRUE ) ){
install.packages( i , dependencies = TRUE )
require( i , character.only = TRUE )
}
}
}
packageCheckClassic(c('goseq','DESeq2','adegenet','vsn','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Loading required package: goseq
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
##
## Loading required package: DESeq2
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:geneLenDataBase':
##
## unfactor
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: adegenet
## Loading required package: ade4
##
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
##
## score
## The following object is masked from 'package:BiocGenerics':
##
## score
##
## /// adegenet 2.1.10 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
## Loading required package: vsn
## Loading required package: devtools
## Loading required package: usethis
## Loading required package: BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.17'
## is available with R version '4.3'; see https://bioconductor.org/install
##
## Attaching package: 'BiocManager'
## The following object is masked from 'package:devtools':
##
## install
## Loading required package: ggplot2
## Loading required package: ggrepel
## Loading required package: markdown
## Loading required package: pheatmap
## Loading required package: RColorBrewer
## Loading required package: genefilter
##
## Attaching package: 'genefilter'
## The following objects are masked from 'package:MatrixGenerics':
##
## rowSds, rowVars
## The following objects are masked from 'package:matrixStats':
##
## rowSds, rowVars
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
## Loading required package: vegan
## Loading required package: permute
##
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
##
## check
## Loading required package: lattice
## This is vegan 2.6-4
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.1.3
##
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
##
## plotMA
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
#BiocManager::install("goseq")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (25fd3b55) has not changed since last install.
## Use `force = TRUE` to force installation
install_github("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis")
## Skipping install of 'pairwiseAdonis' from a github remote, the SHA1 (cb190f76) has not changed since last install.
## Use `force = TRUE` to force installation
library("pairwiseAdonis")
## Loading required package: cluster
library("adegenet")
library('ggvenn')
## Loading required package: grid
library('tximport')
library('apeglm')
library('ashr')
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library('BiocManager')
library('goseq')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Working environment and data loading
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
#candidateGenes<-read.csv('candidateGenes.csv',header=T,sep=',')
samplesSaccharina<-read.table('saccharinaDesignMulti.txt',header=T)
samplesHedophylum<-read.table('hedophylumDesignMulti.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/kelpProject/kallistoOutput'
outputPath<-'/Users/mmeynadier/Documents/kelpProject/DESeq2Output'
#setwd(dataPath)
# DDS object
# If data from kallisto
tx2geneSaccharina<-read.table('Saccharina_tx2gene',header=T)
tx2geneHedophylum<-read.table('Hedophylum_tx2gene',header=T)
#
# # Data importation - txImport
setwd(dataPath)
filesSaccharina<-paste0(samplesSaccharina$sample)
txiSaccharina<-tximport(files = filesSaccharina,type='kallisto',tx2gene = tx2geneSaccharina)
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8
## transcripts missing from tx2gene: 8998
## summarizing abundance
## summarizing counts
## summarizing length
filesHedophylum<-paste0(samplesHedophylum$sample)
txiHedophylum<-tximport(files = filesHedophylum,type='kallisto',tx2gene = tx2geneHedophylum)
## Note: importing `abundance.h5` is typically faster than `abundance.tsv`
## reading in files with read_tsv
## 1 2 3 4 5 6 7 8
## transcripts missing from tx2gene: 9442
## summarizing abundance
## summarizing counts
## summarizing length
ddsSaccharina<-DESeqDataSetFromTximport(txiSaccharina,colData=samplesSaccharina,design= ~treatment + mesocosm)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
ddsHedophylum<-DESeqDataSetFromTximport(txiHedophylum,colData=samplesHedophylum,design= ~treatment + mesocosm)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## using counts and average transcript lengths from tximport
# pre-filtering
keepSaccharina <- rowSums(counts(ddsSaccharina)) >= 10
ddsSaccharina <- ddsSaccharina[keepSaccharina,]
keepHedophylum <- rowSums(counts(ddsHedophylum)) >= 10
ddsHedophylum <- ddsHedophylum[keepHedophylum,]
# Differential expression analysis
ddsSaccharina<-DESeq(ddsSaccharina)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsSaccharina))
## [,1]
## [1,] "Intercept"
## [2,] "treatment_T1_vs_C"
## [3,] "treatment_T2_vs_C"
## [4,] "treatment_T3_vs_C"
## [5,] "mesocosm_M1_vs_M0"
## [6,] "mesocosm_M2_vs_M0"
ddsHedophylum<-DESeq(ddsHedophylum)
## estimating size factors
## using 'avgTxLength' from assays(dds), correcting for library size
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(ddsHedophylum))
## [,1]
## [1,] "Intercept"
## [2,] "treatment_T1_vs_C"
## [3,] "treatment_T2_vs_C"
## [4,] "treatment_T3_vs_C"
## [5,] "mesocosm_M1_vs_M0"
## [6,] "mesocosm_M2_vs_M0"
# Exploring the results - Saccharina
S_C_vs_T1 <- results(ddsSaccharina,contrast=c("treatment", "T1", "C"))
S_C_vs_T1_trimmed <- subset(S_C_vs_T1, padj < 0.05)
S_C_vs_T1_trimmed <- subset(S_C_vs_T1_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_C_vs_T2 <- results(ddsSaccharina,contrast=c("treatment", "T2", "C"))
S_C_vs_T2_trimmed <- subset(S_C_vs_T2, padj < 0.05)
S_C_vs_T2_trimmed <- subset(S_C_vs_T2_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_C_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T3", "C"))
S_C_vs_T3_trimmed <- subset(S_C_vs_T3, padj < 0.05)
S_C_vs_T3_trimmed <- subset(S_C_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_T1_vs_T2 <- results(ddsSaccharina,contrast=c("treatment", "T2", "T1"))
S_T1_vs_T2_trimmed <- subset(S_T1_vs_T2, padj < 0.05)
S_T1_vs_T2_trimmed <- subset(S_T1_vs_T2_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_T1_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T3", "T1"))
S_T1_vs_T3_trimmed <- subset(S_T1_vs_T3, padj < 0.05)
S_T1_vs_T3_trimmed <- subset(S_T1_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
S_T2_vs_T3 <- results(ddsSaccharina,contrast=c("treatment", "T3", "T2"))
S_T2_vs_T3_trimmed <- subset(S_T2_vs_T3, padj < 0.05)
S_T2_vs_T3_trimmed <- subset(S_T2_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
DESeq2::plotMA(S_C_vs_T1_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T1")

DESeq2::plotMA(S_C_vs_T2_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T2")

DESeq2::plotMA(S_C_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T3")

DESeq2::plotMA(S_T1_vs_T2_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T2")

DESeq2::plotMA(S_T1_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T3")

DESeq2::plotMA(S_T2_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT2 vs T3")

vsdSaccharina <- vst(ddsSaccharina, blind=T)
meanSdPlot(assay(vsdSaccharina))

ntd <- normTransform(ddsSaccharina)
meanSdPlot(assay(ntd))

select <- order(rowMeans(counts(ddsSaccharina,normalized=TRUE)),
decreasing=TRUE)[1:20]
df <- as.data.frame(colData(ddsSaccharina)[,c("treatment","mesocosm")])
pheatmap(assay(vsdSaccharina)[select,], cluster_rows=FALSE, show_rownames=F,
cluster_cols=FALSE, annotation_col=df)

pcaData <- plotPCA(vsdSaccharina, intgroup=c("treatment", "mesocosm"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=treatment, shape=mesocosm)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_classic()

sampleDists <- dist(t(assay(vsdSaccharina)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsdSaccharina$treatment, vsdSaccharina$mesocosm, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)

count_tab_assay <- assay(vsdSaccharina)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad <- adonis2(data=samplesSaccharina,dist_tab_assay ~ treatment + mesocosm, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ treatment + mesocosm, data = samplesSaccharina, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## treatment 3 97266 0.39825 0.9256 0.649
## mesocosm 2 76913 0.31492 1.0979 0.321
## Residual 2 70053 0.28683
## Total 7 244232 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesSaccharina$treatment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 C vs T1 1 33853.90 0.9221985 0.2351229 0.6000000 1
## 2 C vs T2 1 34406.16 0.9984946 0.3329986 0.7500000 1
## 3 C vs T3 1 41524.33 1.1779746 0.2819487 0.3000000 1
## 4 T1 vs T2 1 23732.10 0.5758274 0.3654127 1.0000000 1
## 5 T1 vs T3 1 26776.56 0.6861412 0.2554375 1.0000000 1
## 6 T2 vs T3 1 28436.44 0.7719787 0.4356591 0.6666667 1
mod <- betadisper(dist_tab_assay,samplesSaccharina$treatment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## T1-C -7.942638 -29.28590 13.40062 0.5063862
## T2-C -151.493863 -178.49119 -124.49654 0.0001005
## T3-C -15.781330 -37.12459 5.56193 0.1233041
## T2-T1 -143.551225 -172.18621 -114.91624 0.0001395
## T3-T1 -7.838692 -31.21906 15.54168 0.5764828
## T3-T2 135.712533 107.07754 164.34752 0.0001650
mod <- betadisper(dist_tab_assay,samplesSaccharina$mesocosm)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## M1-M0 156.184407 31.57115 280.79766 0.0216549
## M2-M0 149.923883 21.22387 278.62390 0.0287003
## M2-M1 -6.260524 -91.38758 78.86653 0.9691174
ordered_S_C_vs_T1<- S_C_vs_T1_trimmed[order(S_C_vs_T1_trimmed$padj),]
ordered_S_C_vs_T2<- S_C_vs_T2_trimmed[order(S_C_vs_T2_trimmed$padj),]
ordered_S_C_vs_T3<- S_C_vs_T3_trimmed[order(S_C_vs_T3_trimmed$padj),]
ordered_S_T1_vs_T2<- S_T1_vs_T2_trimmed[order(S_T1_vs_T2_trimmed$padj),]
ordered_S_T1_vs_T3<- S_T1_vs_T3_trimmed[order(S_T1_vs_T3_trimmed$padj),]
ordered_S_T2_vs_T3<- S_T2_vs_T3_trimmed[order(S_T2_vs_T3_trimmed$padj),]
write.csv(ordered_S_C_vs_T1, file = '../DESeq2/Saccharina/S_C_VS_T1.csv',sep='')
## Warning in write.csv(ordered_S_C_vs_T1, file =
## "../DESeq2/Saccharina/S_C_VS_T1.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_C_vs_T2, file = '../DESeq2/Saccharina/S_C_VS_T2.csv',sep='')
## Warning in write.csv(ordered_S_C_vs_T2, file =
## "../DESeq2/Saccharina/S_C_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_C_vs_T3, file = '../DESeq2/Saccharina/S_C_VS_T3.csv',sep='')
## Warning in write.csv(ordered_S_C_vs_T3, file =
## "../DESeq2/Saccharina/S_C_VS_T3.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_T1_vs_T2, file = '../DESeq2/Saccharina/S_T1_VS_T2.csv',sep='')
## Warning in write.csv(ordered_S_T1_vs_T2, file =
## "../DESeq2/Saccharina/S_T1_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_T1_vs_T2, file ='../DESeq2/Saccharina/S_T1_VS_T2.csv',sep='')
## Warning in write.csv(ordered_S_T1_vs_T2, file =
## "../DESeq2/Saccharina/S_T1_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_T1_vs_T3, file ='../DESeq2/Saccharina/S_T1_VS_T3.csv',sep='')
## Warning in write.csv(ordered_S_T1_vs_T3, file =
## "../DESeq2/Saccharina/S_T1_VS_T3.csv", : attempt to set 'sep' ignored
write.csv(ordered_S_T2_vs_T3, file ='../DESeq2/Saccharina/S_T2_VS_T3.csv',sep='')
## Warning in write.csv(ordered_S_T2_vs_T3, file =
## "../DESeq2/Saccharina/S_T2_VS_T3.csv", : attempt to set 'sep' ignored
# Hedophylum
H_C_vs_T1 <- results(ddsHedophylum,contrast=c("treatment", "T1", "C"))
H_C_vs_T1_trimmed <- subset(H_C_vs_T1, padj < 0.05)
H_C_vs_T1_trimmed <- subset(H_C_vs_T1_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_C_vs_T2 <- results(ddsHedophylum,contrast=c("treatment", "T2", "C"))
H_C_vs_T2_trimmed <- subset(H_C_vs_T2, padj < 0.05)
H_C_vs_T2_trimmed <- subset(H_C_vs_T2_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_C_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T3", "C"))
H_C_vs_T3_trimmed <- subset(H_C_vs_T3, padj < 0.05)
H_C_vs_T3_trimmed <- subset(H_C_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_T1_vs_T2 <- results(ddsHedophylum,contrast=c("treatment", "T2", "T1"))
H_T1_vs_T2_trimmed <- subset(H_T1_vs_T2, padj < 0.05)
H_T1_vs_T2_trimmed <- subset(H_T1_vs_T2_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_T1_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T3", "T1"))
H_T1_vs_T3_trimmed <- subset(H_T1_vs_T3, padj < 0.05)
H_T1_vs_T3_trimmed <- subset(H_T1_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
H_T2_vs_T3 <- results(ddsHedophylum,contrast=c("treatment", "T3", "T2"))
H_T2_vs_T3_trimmed <- subset(H_T2_vs_T3, padj < 0.05)
H_T2_vs_T3_trimmed <- subset(H_T2_vs_T3_trimmed, log2FoldChange > 2 | log2FoldChange < -2)
DESeq2::plotMA(H_C_vs_T1_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T1")

DESeq2::plotMA(H_C_vs_T2_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T2")

DESeq2::plotMA(H_C_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nC vs T3")

DESeq2::plotMA(H_T1_vs_T2_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T2")

DESeq2::plotMA(H_T1_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT1 vs T3")

DESeq2::plotMA(H_T2_vs_T3_trimmed,ylim=c(-50,50),main="MA-plot for the shrunken log2 fold changes\nT2 vs T3")

vsdHedophylum <- vst(ddsHedophylum, blind=T)
meanSdPlot(assay(vsdHedophylum))

ntd <- normTransform(ddsHedophylum)
meanSdPlot(assay(ntd))

select <- order(rowMeans(counts(ddsHedophylum,normalized=TRUE)),
decreasing=TRUE)[1:20]
df <- as.data.frame(colData(ddsHedophylum)[,c("treatment","mesocosm")])
pheatmap(assay(vsdHedophylum)[select,], cluster_rows=FALSE, show_rownames=F,
cluster_cols=FALSE, annotation_col=df)

pcaData <- plotPCA(vsdHedophylum, intgroup=c("treatment", "mesocosm"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color=treatment, shape=mesocosm)) +
geom_point(size=3) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_classic()

sampleDists <- dist(t(assay(vsdHedophylum)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsdHedophylum$treatment, vsdHedophylum$mesocosm, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)

count_tab_assay <- assay(vsdHedophylum)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad <- adonis2(data=samplesHedophylum,dist_tab_assay ~ treatment + mesocosm, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ treatment + mesocosm, data = samplesHedophylum, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## treatment 3 74595 0.51481 1.2364 0.121
## mesocosm 2 30082 0.20761 0.7479 0.934
## Residual 2 40220 0.27758
## Total 7 144896 1.00000
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesHedophylum$treatment)
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 C vs T1 1 25257.52 1.5996828 0.4443955 0.3333333 1
## 2 C vs T2 1 33516.99 2.0688708 0.5084631 0.3333333 1
## 3 C vs T3 1 28645.67 1.3980340 0.4114244 0.3333333 1
## 4 T1 vs T2 1 21663.63 1.4776619 0.4249010 0.3333333 1
## 5 T1 vs T3 1 17078.13 0.9012158 0.3106338 1.0000000 1
## 6 T2 vs T3 1 23027.39 1.1893305 0.3729091 0.3333333 1
mod <- betadisper(dist_tab_assay,samplesHedophylum$treatment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## T1-C -8.675808 -8.675808 -8.675808 0
## T2-C -6.272203 -6.272203 -6.272203 0
## T3-C 15.661914 15.661914 15.661914 0
## T2-T1 2.403605 2.403605 2.403605 0
## T3-T1 24.337722 24.337722 24.337722 0
## T3-T2 21.934117 21.934117 21.934117 0
mod <- betadisper(dist_tab_assay,samplesHedophylum$mesocosm)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## M1-M0 -32.369342 -84.333467 19.59478 0.2008276
## M2-M0 3.426461 -41.575791 48.42871 0.9669503
## M2-M1 35.795803 -9.206449 80.79806 0.1046991
ordered_H_C_vs_T1<- H_C_vs_T1_trimmed[order(H_C_vs_T1_trimmed$padj),]
ordered_H_C_vs_T2<- H_C_vs_T2_trimmed[order(H_C_vs_T2_trimmed$padj),]
ordered_H_C_vs_T3<- H_C_vs_T3_trimmed[order(H_C_vs_T3_trimmed$padj),]
ordered_H_T1_vs_T2<- H_T1_vs_T2_trimmed[order(H_T1_vs_T2_trimmed$padj),]
ordered_H_T1_vs_T3<- H_T1_vs_T3_trimmed[order(H_T1_vs_T3_trimmed$padj),]
ordered_H_T2_vs_T3<- H_T2_vs_T3_trimmed[order(H_T2_vs_T3_trimmed$padj),]
write.csv(ordered_H_C_vs_T1, file = '../DESeq2/Hedophylum/H_C_VS_T1.csv',sep='')
## Warning in write.csv(ordered_H_C_vs_T1, file =
## "../DESeq2/Hedophylum/H_C_VS_T1.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_C_vs_T2, file = '../DESeq2/Hedophylum/H_C_VS_T2.csv',sep='')
## Warning in write.csv(ordered_H_C_vs_T2, file =
## "../DESeq2/Hedophylum/H_C_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_C_vs_T3, file = '../DESeq2/Hedophylum/H_C_VS_T3.csv',sep='')
## Warning in write.csv(ordered_H_C_vs_T3, file =
## "../DESeq2/Hedophylum/H_C_VS_T3.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_T1_vs_T2, file = '../DESeq2/Hedophylum/H_T1_VS_T2.csv',sep='')
## Warning in write.csv(ordered_H_T1_vs_T2, file =
## "../DESeq2/Hedophylum/H_T1_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_T1_vs_T2, file ='../DESeq2/Hedophylum/H_T1_VS_T2.csv',sep='')
## Warning in write.csv(ordered_H_T1_vs_T2, file =
## "../DESeq2/Hedophylum/H_T1_VS_T2.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_T1_vs_T3, file ='../DESeq2/Hedophylum/H_T1_VS_T3.csv',sep='')
## Warning in write.csv(ordered_H_T1_vs_T3, file =
## "../DESeq2/Hedophylum/H_T1_VS_T3.csv", : attempt to set 'sep' ignored
write.csv(ordered_H_T2_vs_T3, file ='../DESeq2/Hedophylum/H_T2_VS_T3.csv',sep='')
## Warning in write.csv(ordered_H_T2_vs_T3, file =
## "../DESeq2/Hedophylum/H_T2_VS_T3.csv", : attempt to set 'sep' ignored
heatmap_genes <- function(candidateGenes,vsd,species,genesType,colNames) {
listGenes <- candidateGenes$genes
listGenes2 <- which(rownames(vsd) %in% listGenes)
index <- which(listGenes %in% rownames(vsd))
candidateGenes2 <- candidateGenes[index, ]
listProt <- candidateGenes2$pfam_annotation
listGenes3 <- candidateGenes2$genes
vsdCandidate <- vsd[listGenes3, ]
colnames(vsdCandidate) <- colNames
rownames(vsdCandidate) <- listProt
topVarGenesVsd <- head(order(rowVars(assay(vsdCandidate)), decreasing=TRUE), 50 )
heatmap.2(assay(vsdCandidate)[topVarGenesVsd,], trace="none",scale="row",keysize=1.15,key.xlab = "",
key.title = "",
col=colorRampPalette(rev(brewer.pal(11,"PuOr")))(255), cexRow=0.6, cexCol=0.7,density.info="none",
ylab="PFAM annotation of expressed genes",Colv = F,margins = c(6, 7))
main=paste("Expression level of genes related to",genesType,"in",species,sep= " ")
title(main, cex.main = 0.9)
}
stressListSaccharina<-read.csv('../DESeq2/Saccharina/stressList.csv',header=T,sep=',')
metaboListSaccharina<-read.csv('../DESeq2/Saccharina/metaboList.csv',header=T,sep=',')
transportListSaccharina<-read.csv('../DESeq2/Saccharina/transportList.csv',header=T,sep=',')
photoListSaccharina<-read.csv('../DESeq2/Saccharina/photoList.csv',header=T,sep=',')
signalingListSaccharina<-read.csv('../DESeq2/Saccharina/signalingList.csv',header=T,sep=',')
geneticListSaccharina<-read.csv('../DESeq2/Saccharina/geneticList.csv',header=T,sep=',')
cytoskeletonListSaccharina<-read.csv('../DESeq2/Saccharina/cytoskeletonList.csv',header=T,sep=',')
colnamesSaccharina <- c('C_M0','C_M1','C_M2','T1_M1','T1_M2','T2_M1','T3_M1','T3_M2')
heatmap_genes(cytoskeletonListSaccharina,vsdSaccharina,'Saccharina','cytoskeleton',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(geneticListSaccharina,vsdSaccharina,'Saccharina','transcription / translation',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(metaboListSaccharina,vsdSaccharina,'Saccharina','metabolism',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(signalingListSaccharina,vsdSaccharina,'Saccharina','signaling',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(transportListSaccharina,vsdSaccharina,'Saccharina','transport',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(stressListSaccharina,vsdSaccharina,'Saccharina','stress',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(photoListSaccharina,vsdSaccharina,'Saccharina','respiration',colnamesSaccharina)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

stressListHedophylum<-read.csv('../DESeq2/Hedophylum/stressList.csv',header=T,sep=',')
metaboListHedophylum<-read.csv('../DESeq2/Hedophylum/metaboList.csv',header=T,sep=',')
transportListHedophylum<-read.csv('../DESeq2/Hedophylum/transportList.csv',header=T,sep=',')
photoListHedophylum<-read.csv('../DESeq2/Hedophylum/photoList.csv',header=T,sep=',')
signalingListHedophylum<-read.csv('../DESeq2/Hedophylum/signalingList.csv',header=T,sep=',')
geneticListHedophylum<-read.csv('../DESeq2/Hedophylum/geneticList.csv',header=T,sep=',')
cytoskeletonListHedophylum<-read.csv('../DESeq2/Hedophylum/cytoskeletonList.csv',header=T,sep=',')
colnamesHedophylum <- c('C_M0','C_M2','T1_M1','T1_M2','T2_M1','T2_M2','T3_M0','T3_M2')
heatmap_genes(cytoskeletonListHedophylum,vsdHedophylum,'Hedophylum','cytoskeleton',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(geneticListHedophylum,vsdHedophylum,'Hedophylum','transcription / translation',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(metaboListHedophylum,vsdHedophylum,'Hedophylum','metabolism',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(signalingListHedophylum,vsdHedophylum,'Hedophylum','signaling',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(transportListHedophylum,vsdHedophylum,'Hedophylum','transport',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(stressListHedophylum,vsdHedophylum,'Hedophylum','stress',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

heatmap_genes(photoListHedophylum,vsdHedophylum,'Hedophylum','respiration',colnamesHedophylum)
## Warning in heatmap.2(assay(vsdCandidate)[topVarGenesVsd, ], trace = "none", :
## Discrepancy: Colv is FALSE, while dendrogram is `both'. Omitting column
## dendogram.

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] EnhancedVolcano_1.12.0 ashr_2.2-54
## [3] apeglm_1.16.0 tximport_1.22.0
## [5] ggvenn_0.1.10 pairwiseAdonis_0.4.1
## [7] cluster_2.1.4 limma_3.50.3
## [9] dplyr_1.1.2 vegan_2.6-4
## [11] lattice_0.21-8 permute_0.9-7
## [13] gplots_3.1.3 genefilter_1.76.0
## [15] RColorBrewer_1.1-3 pheatmap_1.0.12
## [17] markdown_1.7 ggrepel_0.9.3
## [19] ggplot2_3.4.2 BiocManager_1.30.20
## [21] devtools_2.4.5 usethis_2.1.6
## [23] vsn_3.62.0 adegenet_2.1.10
## [25] ade4_1.7-22 DESeq2_1.34.0
## [27] SummarizedExperiment_1.24.0 Biobase_2.54.0
## [29] MatrixGenerics_1.6.0 matrixStats_0.63.0
## [31] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1
## [33] IRanges_2.28.0 S4Vectors_0.32.4
## [35] BiocGenerics_0.40.0 goseq_1.46.0
## [37] geneLenDataBase_1.30.0 BiasedUrn_2.0.9
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.3 tidyselect_1.2.0 RSQLite_2.3.1
## [4] AnnotationDbi_1.56.2 htmlwidgets_1.6.2 BiocParallel_1.28.3
## [7] munsell_0.5.0 preprocessCore_1.56.0 miniUI_0.1.1.1
## [10] withr_2.5.0 colorspace_2.1-0 filelock_1.0.2
## [13] ggalt_0.4.0 knitr_1.43 rstudioapi_0.14
## [16] Rttf2pt1_1.3.12 labeling_0.4.2 bbmle_1.0.25
## [19] GenomeInfoDbData_1.2.7 mixsqp_0.3-48 farver_2.1.1
## [22] bit64_4.0.5 coda_0.19-4 vctrs_0.6.2
## [25] generics_0.1.3 xfun_0.39 BiocFileCache_2.2.1
## [28] R6_2.5.1 ggbeeswarm_0.7.2 invgamma_1.1
## [31] locfit_1.5-9.7 bitops_1.0-7 cachem_1.0.8
## [34] DelayedArray_0.20.0 vroom_1.6.3 promises_1.2.0.1
## [37] BiocIO_1.4.0 scales_1.2.1 beeswarm_0.4.0
## [40] gtable_0.3.3 ash_1.0-15 affy_1.72.0
## [43] processx_3.8.1 rlang_1.1.1 splines_4.1.2
## [46] extrafontdb_1.0 rtracklayer_1.54.0 hexbin_1.28.3
## [49] yaml_2.3.7 reshape2_1.4.4 GenomicFeatures_1.46.5
## [52] httpuv_1.6.11 extrafont_0.19 tools_4.1.2
## [55] affyio_1.64.0 ellipsis_0.3.2 jquerylib_0.1.4
## [58] sessioninfo_1.2.2 Rcpp_1.0.10 plyr_1.8.8
## [61] progress_1.2.2 zlibbioc_1.40.0 purrr_1.0.1
## [64] RCurl_1.98-1.12 ps_1.7.5 prettyunits_1.1.1
## [67] urlchecker_1.0.1 fs_1.6.2 magrittr_2.0.3
## [70] truncnorm_1.0-9 mvtnorm_1.1-3 SQUAREM_2021.1
## [73] pkgload_1.3.2 hms_1.1.3 mime_0.12
## [76] evaluate_0.21 xtable_1.8-4 XML_3.99-0.14
## [79] emdbook_1.3.12 compiler_4.1.2 biomaRt_2.50.3
## [82] bdsmatrix_1.3-6 maps_3.4.1 tibble_3.2.1
## [85] KernSmooth_2.23-21 crayon_1.5.2 htmltools_0.5.5
## [88] tzdb_0.4.0 mgcv_1.8-42 later_1.3.1
## [91] geneplotter_1.72.0 DBI_1.1.3 proj4_1.0-12
## [94] dbplyr_2.3.2 MASS_7.3-60 rappdirs_0.3.3
## [97] readr_2.1.4 Matrix_1.5-1 cli_3.6.1
## [100] parallel_4.1.2 igraph_1.4.2 pkgconfig_2.0.3
## [103] GenomicAlignments_1.30.0 numDeriv_2016.8-1.1 xml2_1.3.4
## [106] annotate_1.72.0 vipor_0.4.5 bslib_0.4.2
## [109] XVector_0.34.0 stringr_1.5.0 callr_3.7.3
## [112] digest_0.6.31 Biostrings_2.62.0 rmarkdown_2.21
## [115] restfulr_0.0.15 curl_5.0.0 shiny_1.7.4
## [118] Rsamtools_2.10.0 gtools_3.9.4 rjson_0.2.21
## [121] lifecycle_1.0.3 nlme_3.1-162 jsonlite_1.8.4
## [124] seqinr_4.2-30 fansi_1.0.4 pillar_1.9.0
## [127] ggrastr_1.0.1 KEGGREST_1.34.0 fastmap_1.1.1
## [130] httr_1.4.6 pkgbuild_1.4.0 survival_3.5-5
## [133] GO.db_3.14.0 glue_1.6.2 remotes_2.4.2
## [136] png_0.1-8 bit_4.0.5 stringi_1.7.12
## [139] sass_0.4.6 profvis_0.3.8 blob_1.2.4
## [142] caTools_1.18.2 memoise_2.0.1 irlba_2.3.5.1
## [145] ape_5.7-1